Importing the Libraries

library(tidyverse)
library(janitor)
library(skimr)
library(DT)

library(dplyr, warn.conflicts = FALSE)
# Suppress summarise info
options(dplyr.summarise.inform = FALSE)

Importing the data

data <- read_csv("airbnb prices.csv")

Data

str(data)
## spec_tbl_df [18,723 x 20] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ room_id             : num [1:18723] 10176931 8935871 14011697 6137978 18630616 ...
##  $ survey_id           : num [1:18723] 1476 1476 1476 1476 1476 ...
##  $ host_id             : num [1:18723] 49180562 46718394 10346595 8685430 70191803 ...
##  $ room_type           : chr [1:18723] "Shared room" "Shared room" "Shared room" "Shared room" ...
##  $ country             : logi [1:18723] NA NA NA NA NA NA ...
##  $ city                : chr [1:18723] "Amsterdam" "Amsterdam" "Amsterdam" "Amsterdam" ...
##  $ borough             : logi [1:18723] NA NA NA NA NA NA ...
##  $ neighborhood        : chr [1:18723] "De Pijp / Rivierenbuurt" "Centrum West" "Watergraafsmeer" "Centrum West" ...
##  $ reviews             : num [1:18723] 7 45 1 7 1 184 67 2 2 0 ...
##  $ overall_satisfaction: num [1:18723] 4.5 4.5 0 5 0 4.5 5 0 0 0 ...
##  $ accommodates        : num [1:18723] 2 4 3 4 2 2 16 2 2 12 ...
##  $ bedrooms            : num [1:18723] 1 1 1 1 1 1 1 1 1 1 ...
##  $ bathrooms           : logi [1:18723] NA NA NA NA NA NA ...
##  $ price               : num [1:18723] 156 126 132 121 93 102 462 414 222 301 ...
##  $ minstay             : logi [1:18723] NA NA NA NA NA NA ...
##  $ name                : chr [1:18723] "Red Light/ Canal view apartment (Shared)" "Sunny and Cozy Living room in quite neighbours" "Amsterdam" "Canal boat RIDE in Amsterdam" ...
##  $ last_modified       : POSIXct[1:18723], format: "2017-07-23 13:06:27" "2017-07-23 13:06:23" ...
##  $ latitude            : num [1:18723] 52.4 52.4 52.3 52.4 52.4 ...
##  $ longitude           : num [1:18723] 4.89 4.9 4.94 4.89 4.85 ...
##  $ location            : chr [1:18723] "0101000020E610000033FAD170CA8C13403BC5AA41982D4A40" "0101000020E6100000842A357BA095134042791F4773304A40" "0101000020E6100000A51133FB3CC613403543AA285E2B4A40" "0101000020E6100000DF180280638F134085EE92382B304A40" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   room_id = col_double(),
##   ..   survey_id = col_double(),
##   ..   host_id = col_double(),
##   ..   room_type = col_character(),
##   ..   country = col_logical(),
##   ..   city = col_character(),
##   ..   borough = col_logical(),
##   ..   neighborhood = col_character(),
##   ..   reviews = col_double(),
##   ..   overall_satisfaction = col_double(),
##   ..   accommodates = col_double(),
##   ..   bedrooms = col_double(),
##   ..   bathrooms = col_logical(),
##   ..   price = col_double(),
##   ..   minstay = col_logical(),
##   ..   name = col_character(),
##   ..   last_modified = col_datetime(format = ""),
##   ..   latitude = col_double(),
##   ..   longitude = col_double(),
##   ..   location = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>
# TOP Earning Hosts
host_earning <- data %>%
    group_by(host_id) %>%
    summarise(total_earning = sum(price)) %>%
    arrange(-total_earning)

print(host_earning)
## # A tibble: 15,943 x 2
##      host_id total_earning
##        <dbl>         <dbl>
##  1  48703385         29493
##  2   1464510         11397
##  3   8558897         11300
##  4 113977564         10098
##  5    517215          9143
##  6 107745142          8622
##  7  65859990          8581
##  8  46691672          7779
##  9  84453740          7412
## 10  22855069          6000
## # ... with 15,933 more rows
knitr::kable(
  host_earning[1:10, ], 
  caption = "A knitr kable."
)
A knitr kable.
host_id total_earning
48703385 29493
1464510 11397
8558897 11300
113977564 10098
517215 9143
107745142 8622
65859990 8581
46691672 7779
84453740 7412
22855069 6000
#Room Type and their count grouped by Neighbourhood
rooms<-data %>% 
  group_by(neighborhood, room_type) %>% 
  count(room_type)

print(rooms)
## # A tibble: 61 x 3
## # Groups:   neighborhood, room_type [61]
##    neighborhood           room_type           n
##    <chr>                  <chr>           <int>
##  1 Bijlmer Centrum        Entire home/apt    44
##  2 Bijlmer Centrum        Private room       54
##  3 Bijlmer Centrum        Shared room         1
##  4 Bijlmer Oost           Entire home/apt    45
##  5 Bijlmer Oost           Private room       52
##  6 Bos en Lommer          Entire home/apt   836
##  7 Bos en Lommer          Private room      149
##  8 Bos en Lommer          Shared room         3
##  9 Buitenveldert / Zuidas Entire home/apt   184
## 10 Buitenveldert / Zuidas Private room       64
## # ... with 51 more rows
#Room Type Vs Bookings (Grouped by Neighbourhood)
ggplot(data=rooms, aes(x=room_type, y=n, fill=room_type)) + 
  geom_bar(stat="identity") +
  geom_text(aes(label=n), vjust=-0.3, size=3)+
  facet_wrap(~neighborhood) +
  ylim(0,3000) + 
  labs(title = "Room Type Vs No. of Bookings (Grouped by Neighbourhood)")+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank())

#Neighbourhoods with Highest Bookings
neighborhood_bookings <-data %>% 
  group_by(neighborhood) %>% 
  count(neighborhood) %>% 
  arrange(-n)

print(neighborhood_bookings)
## # A tibble: 23 x 2
## # Groups:   neighborhood [23]
##    neighborhood                               n
##    <chr>                                  <int>
##  1 De Baarsjes / Oud West                  3289
##  2 De Pijp / Rivierenbuurt                 2378
##  3 Centrum West                            2225
##  4 Centrum Oost                            1730
##  5 Westerpark                              1430
##  6 Noord-West / Noord-Midden               1418
##  7 Oud Oost                                1169
##  8 Bos en Lommer                            988
##  9 Oostelijk Havengebied / Indische Buurt   921
## 10 Watergraafsmeer                          517
## # ... with 13 more rows
ggplot(data=neighborhood_bookings, aes(x=n, y=reorder(neighborhood,n), fill=n)) + 
  geom_bar(stat="identity") +
  geom_text(aes(label=n),hjust=-0.2, size=3.2)+
  xlim(0,3500)+
  theme_minimal()

library(data.table)
## Warning: package 'data.table' was built under R version 4.1.2
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## The following object is masked from 'package:purrr':
## 
##     transpose
library(dplyr)
library(formattable)
## Warning: package 'formattable' was built under R version 4.1.2
library(tidyr)

#Set a few color variables to make our table more visually appealing

customGreen0 = "#DeF7E9"
customGreen = "#71CA97"
customRed = "#ff7f7f"
customRed0 = "#ffdfdf"

a1<- data %>% 
  group_by(room_type) %>% 
  summarise(price = mean(price))

a2<-data %>% 
  group_by(room_type) %>% 
  count(room_type)

room_avg_price<- merge(a1,a2)

formattable(room_avg_price, align =c("l","c","c","c","r"), list(
  `room_type` = formatter("span", style = ~ style(color = "grey",font.weight = "bold")), 
  `n`= color_tile(customGreen0, customGreen),
  `price`= color_tile(customRed0, customRed)
))
room_type price n
Entire home/apt 181.5471 14978
Private room 106.8705 3682
Shared room 103.6825 63